set more off
clear all

cap log close
log using an_te_model_pat,t replace

use sample_clean, clear

drop if patold
sort id year
drop tfp
rename tfp_acf tfp
rename tfp_acf_i tfpi
gen dtfpi = tfpi - tfpi[_n-1] if id==id[_n-1]

gen dexport = exports>0
gen loghhi = log(hhi)
gen logms = log(share4)
gen logfs = log(shfor)

gen mislogfs = (logfs==.)
replace logfs = 0 if logfs==.

tab year,gen(y_)
global yrdum y_3-y_10
tab indold,gen(i_)
global inddum i_2 i_4-i_13 i_16 i_18
drop if i_14 | i_15 | i_17 | i_3
global dummies $inddum $yrdum 
global vars foreign indiv dexport santiago logms loghhi logfs mislogfs
global depvars logs loge loga logm tfp tfpi

qui foreach var of varlist $depvars {
  gen `var'l = `var'[_n-1] if id==id[_n-1]
  gen pre_d`var' = .
  foreach yr of numlist 1995/2005 {
    gen x = (year<=`yr')*d`var'
	qui replace x = . if x==0 & d`var'~=0
	egen xx = mean(x), by(id)
	qui replace pre_d`var' = xx if `yr'==year
	drop x xx
    }
  }
  
qui foreach var of varlist $depvars {
  gen post_d`var' = .
  foreach yr of numlist 2005/1995 {
    gen x = (year>=`yr')*d`var'
	qui replace x = . if x==0 & d`var'~=0
	egen xx = mean(x), by(id)
	qui replace post_d`var' = xx if `yr'==year
	drop x xx
  }
}
  
sum pre_d* post_d*

replace patfirst = patnew & ~patnew[_n-1] & id==id[_n-1]
egen lastyr = max(year),by(id)
gen yearsafter = lastyr-year
sum yearsafter if patnew

drop if patnew & patnew[_n-1] /* remove post-entry observations */
tab year patnew
drop if id~=id[_n-1] 

dprobit patnew logel pre_dloge $vars $dummies, cluster(id) 
cap drop prob
predict prob
twoway (kdensity prob if patnew) (kdensity prob if ~patnew, lpattern(dash)), ///
       legend( label( 1 "treated") label(  2 "control" ) ) xtitle("propensity score") ///
       title("Patenting probability") saving($GRADIR\patprob, replace)

xtset id year
foreach var of varlist $depvars {
  qui reg post_d`var' patnew $vars $dummies ,cluster(id)
  est store reg`var'
  tabstat post_d`var', by(patnew) stat(n mean) col(stat)	 
  teffects psmatch (post_d`var') (patnew `var'l pre_d`var' $vars $dummies), ///
           generate(obs`var') osample(nomatch`var') pstolerance(.0000001)
  est store ps`var'
  
  *cap teffects nnmatch (post_d`var' `var'l pre_d`var' $vars $dummies) (patnew),        ///
  *         ematch($inddum) dmvariables osample(nomatch`var')
  *est store nn`var'	
  }	

predict pscore, ps
corr ps prob
tebalance summarize
teffects overlap

esttab reg*, keep(patnew $vars) order(patnew $vars)                        ///
       b(%9.4f) se(%9.4f) sfmt(%15.5) stats(r2 rmse F df_m N N_clust) star
*esttab nn*, b(%9.4f) se(%9.4f) sfmt(%15.5) stats(N n1) star
esttab ps*, b(%9.4f) se(%9.4f) sfmt(%15.5) stats(N n1) star
 
log close



